###############################################                                     
# Cause of Effect? Turnout in Hispanic Majority-Minority Districts
#  - John A. Henderson, Jasjeet S. Sekhon, and Rocio Titiunik
#  - Forthcoming in Political Analysis
#  - Replication file for <Table XIII>
#  - April 14, 2016
###############################################

###########################################################################
#
## GENERATES TABLE XIII: CROSS-SECTION RESULTS FOR 2002 
#
###########################################################################

options(width=150)
rm(list=ls())   

path='/local/'  
source(paste(path,'replicationPA/funs/headers.R',sep=''))
   
set.seed(57459)

#WH2data <- read.dta(file = paste(path,"replicationPA/data/Baseline-afterMATCH-recreated.dta",sep=''))          
load(file = paste(path,"replicationPA/data/Baseline-afterMATCH-recreated.Rdata",sep=''))          
dim(WH2data) 

WH2data=WH2data[which(WH2data$dfound_pair_newdata==1),]  
dim(WH2data)
      
#Sdata <-  read.dta(file = paste(path,"replicationPA/data/Matched-recreated.dta",sep=''))
load(file = paste(path,"replicationPA/data/Matched-recreated.Rdata",sep=''))   
dim(Sdata)

      
Sdata=Sdata[which(Sdata$dfound_pair_newdata==1),]
dim(Sdata)

WH2data$hvap=WH2data$pop_hispanic18 
WH2data$nhvap=WH2data$vap-WH2data$hvap

WH2data$phh_income99_39less  = WH2data$phh_income99_0to19    + WH2data$phh_income99_20to39
WH2data$phh_income99_40to74  = WH2data$phh_income99_40to59   + WH2data$phh_income99_60to74
WH2data$phh_income99_100plus = WH2data$phh_income99_100to199 + WH2data$phh_income99_200
WH2data$ppop_foreign         = WH2data$ppop_foreign_naturcit + WH2data$ppop_foreign_nocit

WH2data$y98_reg_nhisp		 = WH2data$y98_reg_tot - WH2data$y98_reg_hisp 
WH2data$y98_preg_nhisp		 = WH2data$y98_reg_nhisp/WH2data$y98_reg_tot
       
WH2data$y00_turn_nhisp       = WH2data$y00_turn_tot - WH2data$y00_turn_hisp
WH2data$y00_reg_nhisp        = WH2data$y00_reg_tot - WH2data$y00_reg_hisp

WH2data$y02_turn_nhisp       = WH2data$y02_turn_tot - WH2data$y02_turn_hisp
WH2data$y02_reg_nhisp        = WH2data$y02_reg_tot  - WH2data$y02_reg_hisp                   

WH2data$y04_turn_nhisp       = WH2data$y04_turn_tot - WH2data$y04_turn_hisp
WH2data$y04_reg_nhisp        = WH2data$y04_reg_tot  - WH2data$y04_reg_hisp                                                                           
                                                                          
WH2data$y06_turn_nhisp       = WH2data$y06_turn_tot - WH2data$y06_turn_hisp
WH2data$y06_reg_nhisp        = WH2data$y06_reg_tot  - WH2data$y06_reg_hisp

Sdata$hvap=Sdata$pop_hispanic18 
Sdata$nhvap=Sdata$vap-Sdata$hvap  

Sdata$phh_income99_39less  = Sdata$phh_income99_0to19    + Sdata$phh_income99_20to39
Sdata$phh_income99_40to74  = Sdata$phh_income99_40to59   + Sdata$phh_income99_60to74
Sdata$phh_income99_100plus = Sdata$phh_income99_100to199 + Sdata$phh_income99_200
Sdata$ppop_foreign         = Sdata$ppop_foreign_naturcit + Sdata$ppop_foreign_nocit

Sdata$y98_reg_nhisp			 = Sdata$y98_reg_tot - Sdata$y98_reg_hisp
Sdata$y98_preg_nhisp		 = Sdata$y98_reg_nhisp/Sdata$y98_reg_tot

Sdata$y00_turn_nhisp       = Sdata$y00_turn_tot - Sdata$y00_turn_hisp
Sdata$y00_reg_nhisp        = Sdata$y00_reg_tot - Sdata$y00_reg_hisp

Sdata$y02_turn_nhisp       = Sdata$y02_turn_tot - Sdata$y02_turn_hisp
Sdata$y02_reg_nhisp        = Sdata$y02_reg_tot  - Sdata$y02_reg_hisp                   

Sdata$y04_turn_nhisp       = Sdata$y04_turn_tot - Sdata$y04_turn_hisp
Sdata$y04_reg_nhisp        = Sdata$y04_reg_tot  - Sdata$y04_reg_hisp                                                                           
                                                                          
Sdata$y06_turn_nhisp       = Sdata$y06_turn_tot - Sdata$y06_turn_hisp
Sdata$y06_reg_nhisp        = Sdata$y06_reg_tot  - Sdata$y06_reg_hisp         

names(Sdata)[which(names(Sdata)=='tr')]='Tr'
names(WH2data)[which(names(WH2data)=='tr')]='Tr'


    
# WH2data treatment 
Tr <- WH2data$Tr
m1=c()
m1$index.treated=which(Tr==T)
m1$index.control=which(Tr==F)           
print(table(Tr)) 
    
# Sdata treatment 
Tr <- Sdata$Tr
m2=c()
m2$index.treated=which(Tr==T)
m2$index.control=which(Tr==F)           
print(table(Tr))

        
######################################################
### Results matrix
######################################################

PLACcolnm <- c("Mean Tr","Mean Co","Diff means p-val", "KS test p-val")
PLACrownm <- c(
"Hispanic Turnout 2002 WH2", 
"Hispanic Registration 2002 WH2", 
"NonHispanic Turnout 2002 WH2",
"NonHispanic Registration 2002 WH2", 

"Hispanic Turnout 2002 AM",
"Hispanic Registration 2002 AM", 
"NonHispanic Turnout 2002 AM",
"NonHispanic Registration 2002 AM"
)

RESPLAC<- matrix(data=NA,nrow=length(PLACrownm), ncol=length(PLACcolnm), dimnames = list(PLACrownm,PLACcolnm))


################################################################################
### Cross-sectional results (i): Hispanic Registration in 2002 (as share of HVAP) 
################################################################################

#BASELINE   
    
Y <- (WH2data$y02_reg_hisp/WH2data$hvap)
Y[which(WH2data$y02_reg_hisp==999999 | WH2data$hvap==0 | WH2data$hvap==999999)]<-NA
Tr <- WH2data$Tr == 1    

print(rbind('Percent overruns',as.numeric(length(which(Y>1))/length(Y))))
Y[which(Y>1)] <- NA   
       
TrWH2<-Tr[c(m1$index.treated,m1$index.control)]
YWH2<-Y[c(m1$index.treated,m1$index.control)]

#MATCHED

Y <- (Sdata$y02_reg_hisp/Sdata$hvap)
Y[which(Sdata$hvap==0 | Sdata$hvap==999999 | Sdata$y02_reg_hisp==999999)] <- NA
            
print(rbind('Percent overruns',as.numeric(length(which(Y>1))/length(Y))))
Y[which(Y>1)] <- NA

Y1<-Y[m2$index.treated]
Y0<-Y[m2$index.control]

Ym <- c(Y1,Y0)
Trm <- c(rep(TRUE,length(Y1)),rep(FALSE,length(Y0)))


########################
## WH2data: Most naive
#######################
cat("Top of results for WH2data \n")
t = t.test(YWH2[TrWH2], YWH2[!TrWH2])
cat("T-test:\n");  print(t)
ks = ks.boot(YWH2[TrWH2], YWH2[!TrWH2])
cat("KS Boot:\n"); 
print(ks)

RESPLAC["Hispanic Registration 2002 WH2","Mean Tr"] = t$estimate[1]
RESPLAC["Hispanic Registration 2002 WH2","Mean Co"] = t$estimate[2]
RESPLAC["Hispanic Registration 2002 WH2","Diff means p-val"] = t$p.value
RESPLAC["Hispanic Registration 2002 WH2","KS test p-val"] = ks$ks.boot.pvalue

########################
## Final matching: matching across triplets on the stacked data
#######################

cat("Top of results AFTER matching \n")
t=t.test(Ym[Trm], Ym[!Trm])
cat("T-test:\n");  print(t)
ks = ks.boot(Ym[Trm], Ym[!Trm])
cat("KS Boot:\n"); print(ks)

RESPLAC["Hispanic Registration 2002 AM","Mean Tr"] = t$estimate[1]
RESPLAC["Hispanic Registration 2002 AM","Mean Co"] = t$estimate[2]
RESPLAC["Hispanic Registration 2002 AM","Diff means p-val"] = t$p.value
RESPLAC["Hispanic Registration 2002 AM","KS test p-val"] = ks$ks.boot.pvalue


################################################################################
### Cross-sectional results (ii): Hispanic Turnout in 2002 (as share of Hispanic Registration in 2000) 
################################################################################

#BASELINE


Y <- (WH2data$y02_turn_hisp/WH2data$y00_reg_hisp)
Y[which(WH2data$y02_turn_hisp==999999 | WH2data$y00_reg_hisp==0 | WH2data$y00_reg_hisp==999999)]<-NA
Tr <- WH2data$Tr == 1    

TrWH2<-Tr[c(m1$index.treated,m1$index.control)]
YWH2<-Y[c(m1$index.treated,m1$index.control)]


#MATCHED

Y <- (Sdata$y02_turn_hisp/Sdata$y00_reg_hisp)
Y[which(Sdata$y00_reg_hisp==0 | Sdata$y00_reg_hisp==999999 | Sdata$y02_turn_hisp==999999)] <- NA

Y1<-Y[m2$index.treated]
Y0<-Y[m2$index.control]

Ym <- c(Y1,Y0)
Trm <- c(rep(TRUE,length(Y1)),rep(FALSE,length(Y0)))


########################
## WH2data: Most naive
#######################
cat("Top of results for WH2data \n")
t = t.test(YWH2[TrWH2], YWH2[!TrWH2])
cat("T-test:\n");  print(t)
ks = ks.boot(YWH2[TrWH2], YWH2[!TrWH2])
cat("KS Boot:\n"); 
print(ks)

RESPLAC["Hispanic Turnout 2002 WH2","Mean Tr"] = t$estimate[1]
RESPLAC["Hispanic Turnout 2002 WH2","Mean Co"] = t$estimate[2]
RESPLAC["Hispanic Turnout 2002 WH2","Diff means p-val"] = t$p.value
RESPLAC["Hispanic Turnout 2002 WH2","KS test p-val"] = ks$ks.boot.pvalue


########################
## Final matching: matching across triplets on the stacked data
#######################

cat("Top of results AFTER matching \n")
t=t.test(Ym[Trm], Ym[!Trm])
cat("T-test:\n");  print(t)
ks = ks.boot(Ym[Trm], Ym[!Trm])
cat("KS Boot:\n"); print(ks)

RESPLAC["Hispanic Turnout 2002 AM","Mean Tr"] = t$estimate[1]
RESPLAC["Hispanic Turnout 2002 AM","Mean Co"] = t$estimate[2]
RESPLAC["Hispanic Turnout 2002 AM","Diff means p-val"] = t$p.value
RESPLAC["Hispanic Turnout 2002 AM","KS test p-val"] = ks$ks.boot.pvalue

#cat("Hodges-Lehmann\n")
#cat("Using option exact=FALSE\n")
#a <- wilcox.exact(Ym[Trm], Ym[!Trm],exact=FALSE, paired=TRUE, conf.int=TRUE, conf.level=0.95)
#cat("H-L estimate:",a$estimate,"\n")
#cat("H-L CI:",a$conf.int,"\n")
#cat("H-L p.value:",a$p.value,"\n")

################################################################################
### Cross-sectional results (iii): NonHispanic Turnout in 2002 (as share of NonHispanic Registration in 2000) 
################################################################################

#BASELINE


Y <- (WH2data$y02_turn_nhisp/WH2data$y00_reg_nhisp)
Y[which(WH2data$y02_turn_nhisp==999999 | WH2data$y00_reg_nhisp==0 | WH2data$y00_reg_nhisp==999999)]<-NA
Tr <- WH2data$Tr == 1    

TrWH2<-Tr[c(m1$index.treated,m1$index.control)]
YWH2<-Y[c(m1$index.treated,m1$index.control)]


#MATCHED

Y <- (Sdata$y02_turn_nhisp/Sdata$y00_reg_nhisp)
Y[which(Sdata$y00_reg_nhisp==0 | Sdata$y00_reg_nhisp==999999 | Sdata$y02_turn_nhisp==999999)] <- NA

Y1<-Y[m2$index.treated]
Y0<-Y[m2$index.control]

Ym <- c(Y1,Y0)
Trm <- c(rep(TRUE,length(Y1)),rep(FALSE,length(Y0)))
          

########################
## WH2data: Most naive
#######################
cat("Top of results for WH2data \n")
t = t.test(YWH2[TrWH2], YWH2[!TrWH2])
cat("T-test:\n");  print(t)
ks = ks.boot(YWH2[TrWH2], YWH2[!TrWH2])
cat("KS Boot:\n"); 
print(ks)

RESPLAC["NonHispanic Turnout 2002 WH2","Mean Tr"] = t$estimate[1]
RESPLAC["NonHispanic Turnout 2002 WH2","Mean Co"] = t$estimate[2]
RESPLAC["NonHispanic Turnout 2002 WH2","Diff means p-val"] = t$p.value
RESPLAC["NonHispanic Turnout 2002 WH2","KS test p-val"] = ks$ks.boot.pvalue



########################
## Final matching: matching across triplets on the stacked data
#######################

cat("Top of results AFTER matching \n")
t=t.test(Ym[Trm], Ym[!Trm])
cat("T-test:\n");  print(t)
ks = ks.boot(Ym[Trm], Ym[!Trm])
cat("KS Boot:\n"); print(ks)

RESPLAC["NonHispanic Turnout 2002 AM","Mean Tr"] = t$estimate[1]
RESPLAC["NonHispanic Turnout 2002 AM","Mean Co"] = t$estimate[2]
RESPLAC["NonHispanic Turnout 2002 AM","Diff means p-val"] = t$p.value
RESPLAC["NonHispanic Turnout 2002 AM","KS test p-val"] = ks$ks.boot.pvalue



################################################################################
### Cross-sectional results (iv): NonHispanic Registration in 2002 (as share of NHVAP) 
################################################################################

#BASELINE


Y <- (WH2data$y02_reg_nhisp/WH2data$nhvap)
Y[which(WH2data$y02_reg_nhisp==999999 | WH2data$nhvap==0 | WH2data$nhvap==999999)]<-NA
Tr <- WH2data$Tr == 1    
             
print(rbind('Percent overruns',as.numeric(length(which(Y>1))/length(Y))))
Y[which(Y>1)] <- NA

TrWH2<-Tr[c(m1$index.treated,m1$index.control)]
YWH2<-Y[c(m1$index.treated,m1$index.control)]



#MATCHED

Y <- (Sdata$y02_reg_nhisp/Sdata$nhvap)
Y[which(Sdata$nhvap==0 | Sdata$nhvap==999999 | Sdata$y02_reg_nhisp==999999)] <- NA
        
print(rbind('Percent overruns',as.numeric(length(which(Y>1))/length(Y))))
Y[which(Y>1)] <- NA

Y1<-Y[m2$index.treated]
Y0<-Y[m2$index.control]

Ym <- c(Y1,Y0)
Trm <- c(rep(TRUE,length(Y1)),rep(FALSE,length(Y0)))


########################
## WH2data: Most naive
#######################
cat("Top of results for WH2data \n")
t = t.test(YWH2[TrWH2], YWH2[!TrWH2])
cat("T-test:\n");  print(t)
ks = ks.boot(YWH2[TrWH2], YWH2[!TrWH2])
cat("KS Boot:\n"); 
print(ks)

RESPLAC["NonHispanic Registration 2002 WH2","Mean Tr"] = t$estimate[1]
RESPLAC["NonHispanic Registration 2002 WH2","Mean Co"] = t$estimate[2]
RESPLAC["NonHispanic Registration 2002 WH2","Diff means p-val"] = t$p.value
RESPLAC["NonHispanic Registration 2002 WH2","KS test p-val"] = ks$ks.boot.pvalue


########################
## Final matching: matching across triplets on the stacked data
#######################

cat("Top of results AFTER matching \n")
t=t.test(Ym[Trm], Ym[!Trm])
cat("T-test:\n");  print(t)
ks = ks.boot(Ym[Trm], Ym[!Trm])
cat("KS Boot:\n"); print(ks)

RESPLAC["NonHispanic Registration 2002 AM","Mean Tr"] = t$estimate[1]
RESPLAC["NonHispanic Registration 2002 AM","Mean Co"] = t$estimate[2]
RESPLAC["NonHispanic Registration 2002 AM","Diff means p-val"] = t$p.value
RESPLAC["NonHispanic Registration 2002 AM","KS test p-val"] = ks$ks.boot.pvalue

#cat("Hodges-Lehmann\n")
#cat("Using option exact=FALSE\n")
#a <- wilcox.exact(Ym[Trm], Ym[!Trm],exact=FALSE, paired=TRUE, conf.int=TRUE, conf.level=0.95)
#cat("H-L estimate:",a$estimate,"\n")
#cat("H-L CI:",a$conf.int,"\n")
#cat("H-L p.value:",a$p.value,"\n")


print(RESPLAC)


#####
####################### LATEX TABLES ##################
library(xtable)
library(stringr)      

baseline=RESPLAC[1:4,]
matched=RESPLAC[5:8,]

rownames(matched)=rownames(baseline)=c('Hispanic Turnout','Hispanic Registration','Non Hispanic Turnout','Non Hispanic Registration')     
  
print('--------------------')
print('Baseline')
print('--------------------')  
xtable(baseline,digits=c(1,3,3,2,2)) 

print('--------------------')
print('Matched')
print('--------------------')  
xtable(matched,digits=c(1,3,3,2,2))
                                              
################## LATEX TABLES ##################